Data Extraction with Python, extract from the datasets the proportion of the vote won by the Democratic candidate.

library(reticulate)
import pandas as pd
import numpy as np

pres = pd.read_csv('data/countypres_2000-2016.csv')
unemp = pd.read_excel('data/Unemployment.xls', header = 7)
pres['proportion'] = pres['candidatevotes']/pres['totalvotes']
pres = pres[(pres.year == 2008)|(pres.year == 2012)|(pres.year == 2016)]
pres = pres[pres.party == 'democrat']
unemp = unemp[['area_name','Unemployment_rate_2008','Unemployment_rate_2012','Unemployment_rate_2016']]
pres.head()
##        year    state state_po  ... totalvotes   version proportion
## 21815  2008  Alabama       AL  ...      23641  20191203   0.257730
## 21818  2008  Alabama       AL  ...      81413  20191203   0.238119
## 21821  2008  Alabama       AL  ...      11630  20191203   0.489854
## 21824  2008  Alabama       AL  ...       8644  20191203   0.265965
## 21827  2008  Alabama       AL  ...      24267  20191203   0.145135
## 
## [5 rows x 12 columns]
unemp.head()
##             area_name  ...  Unemployment_rate_2016
## 0       United States  ...                4.869007
## 1             Alabama  ...                5.800000
## 2  Autauga County, AL  ...                5.100000
## 3  Baldwin County, AL  ...                5.300000
## 4  Barbour County, AL  ...                8.300000
## 
## [5 rows x 4 columns]

Create 6 Choropleths with R

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(readxl)
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(sp)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggpubr)
# read the two tables
pres <- read_csv('data/countypres_2000-2016.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   year = col_double(),
##   state = col_character(),
##   state_po = col_character(),
##   county = col_character(),
##   FIPS = col_double(),
##   office = col_character(),
##   candidate = col_character(),
##   party = col_character(),
##   candidatevotes = col_double(),
##   totalvotes = col_double(),
##   version = col_double()
## )
pres$FIPS <- as.character(pres$FIPS)
unemp <- read_excel("data/Unemployment.xls", range = cell_limits(c(8, 1), c(NA, NA)))
unemp <- unemp %>%
  select(FIPStxt, Unemployment_rate_2008, Unemployment_rate_2012, Unemployment_rate_2016)

# county
county <- st_read('data/tl_2019_us_county-1/tl_2019_us_county.shp')
## Reading layer `tl_2019_us_county' from data source `/Users/Mary/Desktop/ANLY 503/anly503-fall2020-a6-mengyu14/data/tl_2019_us_county-1/tl_2019_us_county.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## geographic CRS: NAD83
county <- county %>%
  filter(STATEFP == '06'|STATEFP == 42|STATEFP == 48)

TX

txp <- pres %>%
  filter((year == 2008|year == 2012|year == 2016) & party == 'democrat' & state == 'Texas')
txp['proportion'] = txp['candidatevotes']/txp['totalvotes']

txp <- txp %>%
  left_join(county, by = c('FIPS'='GEOID'))


T11 <- txp %>%
  filter(year == 2008)
T12 <- txp %>%
  filter(year == 2012)
T13 <- txp %>%
  filter(year == 2016)

T11 <- ggplot(T11, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of TX in 2008")

T12 <- ggplot(T12, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of TX in 2012")

T13 <- ggplot(T13,aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of TX in 2016")
txun <- unemp %>%
  filter(substr(FIPStxt, 1, 2) == '48') %>%
  left_join(county, by = c('FIPStxt' = 'GEOID')) %>%
  tail(-1)

T21 <- ggplot(txun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2008)) +
  scale_fill_distiller(name="Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of TX in 2008")

T22 <- ggplot(txun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2012)) +
  scale_fill_distiller(name="Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of TX in 2012")

T23 <- ggplot(txun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2016)) +
  scale_fill_distiller(name="Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of TX in 2016")
ggarrange(T11, T21, T12, T22, T13, T23, ncol = 2, nrow=3, align = 'hv', widths = c(6, 6, 6, 6, 6, 6))

CA

cap <- pres %>%
  filter((year == 2008|year == 2012|year == 2016) & party == 'democrat' & state == 'California')
cap['proportion'] = cap['candidatevotes']/cap['totalvotes']
cap$FIPS <- paste0('0', cap$FIPS)
cap <- cap %>%
  left_join(county, by = c('FIPS' = 'GEOID'))


C11 <- cap %>%
  filter(year == 2008)
C12 <- cap %>%
  filter(year == 2012)
C13 <- cap %>%
  filter(year == 2016)

C11 <- ggplot(C11, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of CA in 2008")

C12 <- ggplot(C12, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of CA in 2012")

C13 <- ggplot(C13, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of CA in 2016")
caun <- unemp %>%
  filter(substr(FIPStxt, 1, 2) == '06') %>%
  left_join(county, by = c('FIPStxt' = 'GEOID')) %>%
  tail(-1)

C21 <- ggplot(caun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2008)) +
  scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of CA in 2008")

C22 <- ggplot(caun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2012)) +
  scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of CA in 2012")

C23 <- ggplot(caun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2016)) +
  scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of CA in 2016")
ggarrange(C11, C21, C12, C22, C13, C23, ncol = 2, nrow=3, align = 'hv', widths = c(6, 6, 6, 6, 6, 6))

PA

pap <- pres %>%
  filter((year == 2008|year == 2012|year == 2016) & party == 'democrat' & state == 'Pennsylvania')
pap['proportion'] = pap['candidatevotes']/pap['totalvotes']

pap <- pap %>%
  left_join(county, by = c('FIPS' = 'GEOID'))


P11 <- pap %>%
  filter(year == 2008)
P12 <- pap %>%
  filter(year == 2012)
P13 <- pap %>%
  filter(year == 2016)

P11 <- ggplot(P11, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of PA in 2008")

P12 <- ggplot(P12, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of PA in 2012")

P13 <- ggplot(P13, aes(geometry = geometry)) +
  geom_sf(aes(fill = proportion)) +
  scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
  ggtitle("Proportion of Votes for Democrat Candidate of PA in 2016")
paun <- unemp %>%
  filter(substr(FIPStxt, 1, 2) == '42') %>%
  left_join(county, by = c('FIPStxt' = 'GEOID')) %>%
  tail(-1)

P21 <- ggplot(paun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2008)) +
  scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of PA in 2008")

P22 <- ggplot(paun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2012)) +
  scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of PA in 2012")

P23 <- ggplot(paun, aes(geometry = geometry)) +
  geom_sf(aes(fill = Unemployment_rate_2016)) +
  scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
  ggtitle("Unemployment rate of PA in 2016")
ggarrange(P11, P21, P12, P22, P13, P23, ncol = 2, nrow=3, align = 'hv', widths = c(6, 6, 6, 6, 6, 6))